#====# Appendix C: Constructing a placebo set of non-FCPA targets with matching #====#

# Load libraries and set defaults ----
library(cobalt)
library(ggplotify)
library(grid)
library(tidyverse)
library(tidylog, warn.conflicts = FALSE)
source("aux/plot_theme.R")

# Import data ----
match <- read_rds("data_out/matching.rds") # list with matching results

# Figure C.1: Standardized mean difference in covariates, FCPA targets and placebos (PSM) ----
balance <- bal.tab(FCPA_sample ~ as.factor(exchg) + as.factor(naics2) + atq + avg_dec + cshoq + n_unique_iso2 + n_subsid_nonUS + n_subsid_US + n_subsid + is_SP500,
                   data = match$prop_score, binary = "std", un = TRUE,
                   s.d.denom = "pooled", m.threshold = 0.25)

original <- bal.tab(FCPA_sample ~ as.factor(exchg) + as.factor(naics2) + atq + avg_dec + cshoq + n_unique_iso2 + n_subsid_nonUS + n_subsid_US + n_subsid + is_SP500,
                    data = match$whole_pool, binary = "std",
                    s.d.denom = "pooled", un = TRUE, m.threshold = 0.25)

p <- data.frame(matching = rep(1, length(rownames(balance$Balance))),
                std_diff = balance$Balance$Diff.Un,
                var_name = rownames(balance$Balance)) %>% 
  rbind(data.frame(matching = rep(0, length(rownames(original$Balance))),
                   std_diff = original$Balance$Diff.Un,
                   var_name = rownames(original$Balance)) %>%
          filter(var_name %in% rownames(balance$Balance))) %>%
  mutate(type = case_when(str_detect(var_name, "naics2") ~ "Industry classification\n(NAICS-2):",
                          str_detect(var_name, "exchg") ~ "Exchange:",
                          str_detect(var_name, "^n\\_") ~ "Subsidiary activities\n(US and foreign):",
                          TRUE ~ "Financials:") %>%
           fct_relevel(c("Subsidiary activities\n(US and foreign):", "Financials:")),
         var_name = fct_relevel(var_name, c("as.factor(naics2)_99", "as.factor(naics2)_72",
                                            "as.factor(naics2)_71", "as.factor(naics2)_62", 
                                            "as.factor(naics2)_61", "as.factor(naics2)_56", 
                                            "as.factor(naics2)_54", "as.factor(naics2)_53", 
                                            "as.factor(naics2)_52", "as.factor(naics2)_51", 
                                            "as.factor(naics2)_49", "as.factor(naics2)_48", 
                                            "as.factor(naics2)_45", "as.factor(naics2)_42", 
                                            "as.factor(naics2)_33", "as.factor(naics2)_32", 
                                            "as.factor(naics2)_31", "as.factor(naics2)_23", 
                                            "as.factor(naics2)_22", "as.factor(naics2)_21"))) %>%
  ggplot(aes(x = std_diff, y = var_name, 
             shape = as.factor(matching))) +
  geom_vline(xintercept = c(-.25, .25), linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed", color = 'grey') +
  geom_point(size = 2) +
  ylab("") + xlab("Standardized mean differences in covariates\nbetween past FCPA targets and other firms") +
  scale_shape_manual("", breaks = c(0, 1), labels = c("FCPA targets\nvs all firms", "FCPA targets\nvs PS matches"),
                     values = c(4, 19)) +
  facet_grid(type~., space = "free", scales = "free_y",
             switch = "y") +
  scale_y_discrete(labels = as_labeller(c("n_subsid" = "Number of subsidiaries",
                                          "n_subsid_US" = "Number of US subsidiaries",
                                          "n_subsid_nonUS" = "Number of foreign subsidiaries",
                                          "n_unique_iso2" = "Number of countries of foreign operations",
                                          "atq" = "Assets - Total",
                                          "capsq" = "Capital Surplus/Share Premium Reserve",
                                          "ceqq" = "Common/Ordinary Equity - Total",
                                          "ciq" = "Comprehensive Income - Total",
                                          "cshoq" = "Common Shares Outstanding",
                                          "epsf12" = "Earnings Per Share (Diluted)",
                                          "avg_dec" = "Average price at closing, December 2024",
                                          "dvi" = "Indicated annual dividend",
                                          "is_SP500" = "S&P 500 member",
                                          "as.factor(exchg)_11" = "NYSE",
                                          "as.factor(exchg)_14" = "NASDAQ-NMS",
                                          "as.factor(exchg)_19" = "Over-the-counter",
                                          "as.factor(naics2)_99" = "Nonclassifiable Establishments (99)",
                                          "as.factor(naics2)_71" = "Arts, Entertainment, and Recreation (71)",
                                          "as.factor(naics2)_72" = "Accommodation and Food Services (72)",
                                          "as.factor(naics2)_62" = "Health Care and Social Assistance (62)",
                                          "as.factor(naics2)_61" = "Educational Services (61)",
                                          "as.factor(naics2)_56" = "Administrative and Support Services (56)",
                                          "as.factor(naics2)_54" = "Professional, Scientific, and Technical Services (54)",
                                          "as.factor(naics2)_53" = "Real Estate and Rental and Leasing (53)",
                                          "as.factor(naics2)_52" = "Finance and Insurance (52)",
                                          "as.factor(naics2)_51" = "Information (51)",
                                          "as.factor(naics2)_49" = "Transportation and Warehousing (49)",
                                          "as.factor(naics2)_48" = "Transportation and Warehousing (48)",
                                          "as.factor(naics2)_45" = "Retail Trade (45)",
                                          "as.factor(naics2)_42" = "Wholesale Trade (42)",
                                          "as.factor(naics2)_33" = "Primary Metal Manufacturing (33)",
                                          "as.factor(naics2)_32" = "Nonmetallic Mineral Product Manufacturing (32)",
                                          "as.factor(naics2)_31" = "Animal Product and Food Manufacturing (31)",
                                          "as.factor(naics2)_23" = "Construction (23)",
                                          "as.factor(naics2)_22" = "Utilities (22)",
                                          "as.factor(naics2)_21" = "Mining, Quarrying, and Oil and Gas Extraction (21)"))) +
  theme(panel.grid.major.y = element_line(linewidth = .1, color = "grey"),
        # remove all background colors and borders of the strip boxes:
        strip.background.x = element_rect(fill = NA, color = NA),
        strip.background.y = element_rect(fill = NA, color = NA),
        strip.placement = "outside", # place strip text outside
        strip.switch.pad.grid = unit(-1.205, "cm"), # remove space between left facet strip and axes
        strip.clip = "off", # remove clipping of strip text
        strip.text.y.left = element_text(angle = 0, vjust = 1, hjust = 1)) # control strip text (rotate)

# add vertical lines next to facet via grid:
# save grob:
q <- ggplotGrob(p)

# draw lines on grid:
lg <- linesGrob(x = unit(c(1,1), "npc"), 
                y = unit(c(1,0), "npc"), 
                gp = gpar(col = "black", lwd = 0.4))

# add lines to the grob in a quick for loop:
for (k in grep("strip-l", q$layout$name)) {
  q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}

# turn back into a ggplot and save:
as.ggplot(q)
ggsave("plots/figure_C1.pdf", height = 6.5, width = 11)

# Figure C.2: Standardized mean difference in covariates, FCPA targets and placebos (CEM and entropy weighting) ----
coarsen <- bal.tab(FCPA_sample ~ as.factor(exchg) + as.factor(naics2) + atq + avg_dec + cshoq + n_unique_iso2 + n_subsid_nonUS + n_subsid_US + n_subsid + is_SP500,
                   data = match$coar_ex_ma, binary = "std", un = TRUE,
                   s.d.denom = "pooled", m.threshold = 0.25)

entropy <- bal.tab(match$entropy_ba,
                   covs = match$whole_pool %>%
                     mutate(exchg = as.factor(exchg),
                            naics2 = as.factor(naics2)) %>%
                     select(exchg, naics2, atq, avg_dec, cshoq, n_unique_iso2, n_subsid_US, n_subsid_nonUS, n_subsid, is_SP500), 
                   binary = "std",
                   s.d.denom = "pooled", un = TRUE, m.threshold = 0.25)

p <- data.frame(matching = rep("unbalanced", length(rownames(original$Balance))),
                std_diff = original$Balance$Diff.Un,
                var_name = rownames(original$Balance)) %>%
  rbind(data.frame(matching = rep("coarsened", length(rownames(coarsen$Balance))),
                   std_diff = coarsen$Balance$Diff.Un,
                   var_name = rownames(coarsen$Balance))) %>%
  rbind(data.frame(matching = rep("entropy", length(rownames(entropy$Balance))),
                   std_diff = entropy$Balance$Diff.Adj,
                   var_name = rownames(entropy$Balance))) %>%
  filter(var_name %in% rownames(balance$Balance)) %>%
  mutate(type = case_when(str_detect(var_name, "naics2") ~ "Industry classification\n(NAICS-2):",
                          str_detect(var_name, "exchg") ~ "Exchange:",
                          str_detect(var_name, "^n\\_") ~ "Subsidiary activities\n(US and foreign):",
                          TRUE ~ "Financials:") %>%
           fct_relevel(c("Subsidiary activities\n(US and foreign):", "Financials:")),
         var_name = fct_relevel(var_name, c("as.factor(naics2)_99", "as.factor(naics2)_72",
                                            "as.factor(naics2)_71", "as.factor(naics2)_62", 
                                            "as.factor(naics2)_61", "as.factor(naics2)_56", 
                                            "as.factor(naics2)_54", "as.factor(naics2)_53", 
                                            "as.factor(naics2)_52", "as.factor(naics2)_51", 
                                            "as.factor(naics2)_49", "as.factor(naics2)_48", 
                                            "as.factor(naics2)_45", "as.factor(naics2)_42", 
                                            "as.factor(naics2)_33", "as.factor(naics2)_32", 
                                            "as.factor(naics2)_31", "as.factor(naics2)_23", 
                                            "as.factor(naics2)_22", "as.factor(naics2)_21"))) %>%
  ggplot(aes(x = std_diff, y = var_name, 
             shape = as.factor(matching))) +
  geom_vline(xintercept = c(-.25, .25), linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed", color = 'grey') +
  geom_point(size = 2, fill = "white") +
  ylab("") + xlab("Standardized mean differences in covariates\nbetween past FCPA targets and other firms") +
  scale_shape_manual("", breaks = c("unbalanced", "coarsened", "entropy"), labels = c("FCPA targets\nvs all firms", "FCPA targets\nvs CEM matches", "FCPA targets\nvs all firms (entropy)"),
                     values = c(4, 17, 0)) +
  # scale_shape_manual("") +
  facet_grid(type~., space = "free", scales = "free_y",
             switch = "y") +
  scale_y_discrete(labels = as_labeller(c("n_subsid" = "Number of subsidiaries",
                                          "n_subsid_US" = "Number of US subsidiaries",
                                          "n_subsid_nonUS" = "Number of foreign subsidiaries",
                                          "n_unique_iso2" = "Number of countries of foreign operations",
                                          "atq" = "Assets - Total",
                                          "capsq" = "Capital Surplus/Share Premium Reserve",
                                          "ceqq" = "Common/Ordinary Equity - Total",
                                          "ciq" = "Comprehensive Income - Total",
                                          "cshoq" = "Common Shares Outstanding",
                                          "epsf12" = "Earnings Per Share (Diluted)",
                                          "avg_dec" = "Average price at closing, December 2024",
                                          "dvi" = "Indicated annual dividend",
                                          "is_SP500" = "S&P 500 member",
                                          "as.factor(exchg)_11" = "NYSE",
                                          "as.factor(exchg)_14" = "NASDAQ-NMS",
                                          "as.factor(exchg)_19" = "Over-the-counter",
                                          "as.factor(naics2)_99" = "Nonclassifiable Establishments (99)",
                                          "as.factor(naics2)_71" = "Arts, Entertainment, and Recreation (71)",
                                          "as.factor(naics2)_72" = "Accommodation and Food Services (72)",
                                          "as.factor(naics2)_62" = "Health Care and Social Assistance (62)",
                                          "as.factor(naics2)_61" = "Educational Services (61)",
                                          "as.factor(naics2)_56" = "Administrative and Support Services (56)",
                                          "as.factor(naics2)_54" = "Professional, Scientific, and Technical Services (54)",
                                          "as.factor(naics2)_53" = "Real Estate and Rental and Leasing (53)",
                                          "as.factor(naics2)_52" = "Finance and Insurance (52)",
                                          "as.factor(naics2)_51" = "Information (51)",
                                          "as.factor(naics2)_49" = "Transportation and Warehousing (49)",
                                          "as.factor(naics2)_48" = "Transportation and Warehousing (48)",
                                          "as.factor(naics2)_45" = "Retail Trade (45)",
                                          "as.factor(naics2)_42" = "Wholesale Trade (42)",
                                          "as.factor(naics2)_33" = "Primary Metal Manufacturing (33)",
                                          "as.factor(naics2)_32" = "Nonmetallic Mineral Product Manufacturing (32)",
                                          "as.factor(naics2)_31" = "Animal Product and Food Manufacturing (31)",
                                          "as.factor(naics2)_23" = "Construction (23)",
                                          "as.factor(naics2)_22" = "Utilities (22)",
                                          "as.factor(naics2)_21" = "Mining, Quarrying, and Oil and Gas Extraction (21)"))) +
  theme(panel.grid.major.y = element_line(linewidth = .1, color = "grey"),
        # remove all background colors and borders of the strip boxes:
        strip.background.x = element_rect(fill = NA, color = NA),
        strip.background.y = element_rect(fill = NA, color = NA),
        strip.placement = "outside", # place strip text outside
        strip.switch.pad.grid = unit(-1.205, "cm"), # remove space between left facet strip and axes
        strip.clip = "off", # remove clipping of strip text
        strip.text.y.left = element_text(angle = 0, vjust = 1, hjust = 1)) # control strip text (rotate)

# add vertical lines next to facet via grid:
# save grob:
q <- ggplotGrob(p)

# draw lines on grid:
lg <- linesGrob(x = unit(c(1,1), "npc"), 
                y = unit(c(1,0), "npc"), 
                gp = gpar(col = "black", lwd = 0.4))

# add lines to the grob in a quick for loop:
for (k in grep("strip-l", q$layout$name)) {
  q$grobs[[k]]$grobs[[1]]$children[[1]] <- lg
}

# turn back into a ggplot and save:
as.ggplot(q)
ggsave("plots/figure_C2.pdf", height = 6.5, width = 11)

#====# The End #====#